#------------------------------------------------------------------------------------------------#
# Replication Code: Analysis
# Article: How Men React to Shifts in Gender Representation: Ignored Interests? Good for Society?
# Journal: Political Research Quarterly
# Author: Verena Reidinger
# July 25
#------------------------------------------------------------------------------------------------#

#---------------------------------#
####     0. Preparations       ####
#---------------------------------#

# loading packages
library(tidyverse)
library(ggpubr)
library(xtable)
library(emmeans)
library(sandwich)
library(lmtest)
library(texreg)

# load datasets for analysis of male respondents
load("resp_dat_male.RData") # respondent level data of male resp; named "mal"
load("out_dat.RData") # rating level data for output expectations, named "o"
load("fair_dat.RData") # rating level data for fairness perceptions, named "f"

# merge "mal" and "o" to estimate models on men's output expectations
mal_o <- merge(mal, o, by = "ResponseId", all.x = T) # add "all.x" true. this way only male respondents are included in the dataset
mal_f <- merge(mal, f, by = "ResponseId", all.x = T) # add "all.x" true. this way only male respondents are included in the dataset

# Create another set of attribute level variables with more readable labels in the output expectations dataset
mal_o$Gender <- mal_o$gender
mal_o$Gender <- recode(mal_o$Gender, 
                    gender_womenhigh = "65% Women\n35% Men", 
                    gender_womenequal = "50% Women\n50% Men", 
                    gender_womenlow = "20% Women\n80% Men")
class(mal_o$Gender)

mal_o$Gender <- relevel(mal_o$Gender, ref = "50% Women\n50% Men") # change the reference group to equal levels of gender representation 50%/50%

mal_o$Ideology <- mal_o$ideology
mal_o$Ideology <- recode(mal_o$ideology, 
                      ideology_righthigh = "40% Left-wing\n60% Right-wing", 
                      ideology_equal = "50% Left-wing\n50% Right-wing", 
                      ideology_lefthigh = "60% Left-wing\n40% Right-wing")
class(mal_o$Ideology)

# Create another set of attribute level variables with more readable labels in the fairness perceptions dataset

mal_f$Gender <- mal_f$gender
mal_f$Gender <- recode(mal_f$Gender, 
                       gender_womenhigh = "65% Women\n35% Men", 
                       gender_womenequal = "50% Women\n50% Men", 
                       gender_womenlow = "20% Women\n80% Men")
class(mal_f$Gender)

mal_f$Gender <- relevel(mal_f$Gender, ref = "50% Women\n50% Men") # change the reference group to equal levels of gender representation 50%/50%

mal_f$Ideology <- mal_f$ideology
mal_f$Ideology <- recode(mal_f$ideology, 
                         ideology_righthigh = "40% Left-wing\n60% Right-wing", 
                         ideology_equal = "50% Left-wing\n50% Right-wing", 
                         ideology_lefthigh = "60% Left-wing\n40% Right-wing")
class(mal_f$Ideology)

#------------------------------------------------------------------------------------------------#
####          1. Figure 2 (Graph on the left) and the underlying model in Table A7            ####
#------------------------------------------------------------------------------------------------#

# Figure 2 uses one item on output expectations --- question: decision in favour of people like you

# Fit a linear model predicting REP_RAT based on Gender
TA7 <- lm(REP_RAT ~ Gender, data = mal_o)

# Compute robust (clustered) standard errors by clustering on ResponseId
cluster_se <- vcovCL(TA7, cluster = ~ResponseId)

# Perform a hypothesis test (t-test) on the coefficients using the clustered standard errors
coeftest(TA7, vcov = cluster_se)

# Extract model coefficients
coef_values <- coef(TA7)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A7
# Generate output for table A7 
screenreg(list(TA7),
          override.se = cluster_se_values,   # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA7), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A7; Output expectations; Outcome 1"
)

# To plot these effects and generate Figure 2 I use the emmeans package
# However, to use the robust/clustered standard errors with the emmeans package 
# I need to adjust them to the emmeans output manually. See the following steps: 

# Estimate marginal means for each level of women's representation
em <- emmeans(TA7, ~ Gender)

# Summarize the estimated marginal means
em_summary <- summary(em)

# Display the column names of the emmeans summary for correct names 
names(em_summary)

# Extract the linear function (contrast matrix) used to compute marginal means
X <- em@linfct

# Compute corrected standard errors for the marginal means using the cluster-robust covariance matrix
# cluster_se was generated in line 64
corrected_se <- sqrt(diag(X %*% cluster_se %*% t(X)))

# Add the corrected standard errors to the emmeans summary output
em_summary$SE <- corrected_se

# Extract degrees of freedom associated with each estimate
df <- em_summary$df

# Compute the critical t-value for a 95% confidence interval
t_val <- qt(0.975, df)

# Calculate lower and upper 95% confidence limits using corrected standard errors
em_summary$lower.CL <- em_summary$emmean - t_val * corrected_se
em_summary$upper.CL <- em_summary$emmean + t_val * corrected_se

# Recalculate p-values using the corrected standard errors
em_summary$p.value <- 2 * pt(abs(em_summary$emmean / corrected_se), 
                             df = df, lower.tail = FALSE)

# Display the updated emmeans summary with corrected SEs, CIs, and p-values
em_summary

# After adjusting the standard errors I continue with changing the levels of the treatment to increase the distance between 20 and 50% women
# This way the distance between 20 and 50% women is larger than the distance between 50% and 65% women in Figure 2

em_summary_relevel <- em_summary 
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "20% Women\n80% Men"] <- 0 # 20% women as the lowest level
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "50% Women\n50% Men"] <- 2 # 50% women; moving two steps instead of 1
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "65% Women\n35% Men"] <- 3 # 65% women; moving 1 step from 50% 

#Create Figure 2 (left)
FIG2_LEFT <- ggplot(em_summary_relevel, aes(x = Gender_Nlevel, y = emmean)) +  
  # Create a ggplot object using 'em_summary_relevel' dataset
  # Set the aesthetics: x-axis is 'Gender_Nlevel', y-axis is 'emmean'
  
  geom_point(position = position_dodge(0.2), shape = 1, size = 4.5) +  
  # Add points to represent estimated marginal means
  # Use open circles (shape = 1) and dodge position to separate overlapping points
  
  geom_line(position = position_dodge(0.2), size = 0.7, linetype = 2) +  
  # Add dashed lines connecting the points, dodged slightly for clarity
  
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), linewidth = 0.8,
                width = 0.04, position = position_dodge(0.4)) +  
  # Add error bars representing confidence intervals (lower.CL to upper.CL)
  # Slightly wider dodge for error bars, so they don’t overlap with points
  
  labs(
    y = "How much will the assembly's decisions be in the interest of people like you?",  
    title = "Men's Output Expectations"
  ) +  
  # Add axis labels and plot title

  theme_minimal() +  
  # Use a clean, minimal theme for the plot
  
  theme(
    axis.line.x = element_line(linewidth = 0.2, colour = "black"),  
    # Add a thin black line to the x-axis
    
    axis.line.y = element_line(linewidth = 0.2, colour = "black"),  
    # Add a thin black line to the y-axis
    
    axis.ticks =  element_line(linewidth = 0.3, colour = "black"),  
    # Add black tick marks on both axes
    
    axis.text.x = element_text(colour = "black", size = 11),  
    # Make x-axis labels text black and increase font size for readability
    
    axis.title.x = element_blank(),  
    # Remove x-axis title
    
    axis.title.y = element_text(size = 14), 
    # Set font size of y axis title
    
    axis.text.y = element_text(size = 11), 
    # Increase y-axis labels font size for readability
    
    plot.title = element_text(hjust = 0.5, size = 14),  
    # Center the plot title horizontally
  ) +
  
  scale_x_continuous(
    breaks = c(0, 2, 3),  
    # Set specific breaks on the x-axis corresponding to coded levels
    
    labels = c("20% Women\n80% Men", "50% Women\n50% Men", "65% Women\n35% Men"),  
    # Customize labels for each x break, using line breaks (\n) for formatting
    
    expand = c(0, 1), 
    # Prevent the axis from expanding beyond the data range unnecessarily
    
    minor_breaks = NULL  
    # Remove minor grid lines for a cleaner look
  ) +
  
  scale_y_continuous(limits = c(4.6, 5.3))  
# Set fixed y-axis limits to control visual scale and comparison

#FIGURE 2 - LEFT
# Generate FIGURE 2 (left)
FIG2_LEFT

# remove all objects generated in the process of adjusting cluster robust standard errors
rm(list = c("cluster_se", "cluster_se_values", "em", "em_summary", "em_summary_relevel", "X", "t_val", "df", "corrected_se", "coef_values"))


#------------------------------------------------------------------------------------------------#
####        2. Figure 2 (Graph on the right) and the underlying model in Table A10            ####
#------------------------------------------------------------------------------------------------#

# Figure 2 uses one item on fairness perceptions --- question: decision will be fair

# Fit a linear model predicting COND_RAT based on Gender
TA10 <- lm(COND_RAT ~ Gender, data = mal_f)

# Compute robust (clustered) standard errors by clustering on ResponseId
cluster_se <- vcovCL(TA10, cluster = ~ResponseId)

# Perform a hypothesis test (t-test) on the coefficients using the clustered standard errors
coeftest(TA10, vcov = cluster_se)

# Extract model coefficients
coef_values <- coef(TA10)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A10
# Generate output for table A10
screenreg(list(TA10),
          override.se = cluster_se_values,   # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA10), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A10; Fairness Perceptions; Outcome 1"
)

# To plot these effects and generate Figure 2 I use the emmeans package
# However, to use the robust/clustered standard errors with the emmeans package 
# I need to adjust them to the emmeans output manually. See the following steps: 

# Estimate marginal means (aka least-squares means) for each level of women's representation
em <- emmeans(TA10, ~ Gender)

# Summarize the estimated marginal means
em_summary <- summary(em)

# Display the column names of the emmeans summary for correct names 
names(em_summary)

# Extract the linear function (contrast matrix) used to compute marginal means
X <- em@linfct

# Compute corrected standard errors for the marginal means using the cluster-robust covariance matrix
# cluster_se was generated in line 239
corrected_se <- sqrt(diag(X %*% cluster_se %*% t(X)))

# Add the corrected standard errors to the emmeans summary output
em_summary$SE <- corrected_se

# Extract degrees of freedom associated with each estimate
df <- em_summary$df

# Compute the critical t-value for a 95% confidence interval
t_val <- qt(0.975, df)

# Calculate lower and upper 95% confidence limits using corrected standard errors
em_summary$lower.CL <- em_summary$emmean - t_val * corrected_se
em_summary$upper.CL <- em_summary$emmean + t_val * corrected_se

# Recalculate p-values using the corrected standard errors
em_summary$p.value <- 2 * pt(abs(em_summary$emmean / corrected_se), 
                             df = df, lower.tail = FALSE)

# Display the updated emmeans summary with corrected SEs, CIs, and p-values
em_summary

# After adjusting the standard errors I continue with changing the levels of the treatment to increase the distance between 20 and 50% women
# This way the distance between 20 and 50% women is larger than the distance between 50% and 65% women in Figure 2

em_summary_relevel <- em_summary 
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "20% Women\n80% Men"] <- 0 # 20% women as the lowest level
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "50% Women\n50% Men"] <- 2 # 50% women; moving two steps instead of 1
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "65% Women\n35% Men"] <- 3 # 65% women; moving 1 step from 50% 


FIG2_RIGHT <- ggplot(em_summary_relevel, aes(x = Gender_Nlevel, y = emmean)) +
  # Create a ggplot object using 'em_summary_relevel' dataset
  # Set the aesthetics: x-axis is 'Gender_Nlevel', y-axis is 'emmean'
  
  geom_point(position = position_dodge(0.2), size = 4.5, shape = 0) +  
  # Add points to represent estimated marginal means
  # Use open square (shape = 0) and dodge position to separate overlapping points
  
  geom_line(position = position_dodge(0.2), size = 0.7, linetype = 2) +
  # Add dashed lines connecting the points, dodged slightly for clarity
  
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), linewidth = 0.8,
                width = 0.04, position = position_dodge(0.4)) +  
  # Add error bars representing confidence intervals (lower.CL to upper.CL)
  # Slightly wider dodge for error bars, so they don’t overlap with points
  
  labs(y = "How fair will the assembly's decisions be?", 
       title = "Men's Fairness Perceptions", 
  ) +
  # Add axis labels and plot title

  theme_minimal() +
  # Use a clean, minimal theme for the plot
  
  theme(axis.line.x = element_line(linewidth = 0.2, colour = "black"),
        # Add a thin black line to the x-axis
        
        axis.line.y = element_line(linewidth = 0.2, colour = "black"),
        # Add a thin black line to the y-axis
        
        axis.ticks =  element_line(linewidth = 0.3, colour = "black"), 
        # Add black tick marks on both axes
        
        axis.text.x = element_text(colour = "black", size = 11),
        # Make x-axis labels text black and increase font size for readability
        
        axis.title.x = element_blank(), 
        # Remove x-axis title
        
        axis.title.y = element_text(size = 14), 
        # Set font size of y axis title
        
        axis.text.y = element_text(size = 11), 
        # Increase y-axis labels font size for readability
        
        plot.title = element_text(hjust = 0.5, size = 14), 
        # Center the plot title horizontally
        
        ) +
  
  scale_x_continuous(breaks = c(0, 2, 3), 
                     # Set specific breaks on the x-axis corresponding to coded levels
                     
                     labels = c("20% Women\n80% Men", "50% Women\n50% Men", "65% Women\n35% Men"),
                     # Customize labels for each x break, using line breaks (\n) for formatting
                     
                     expand = c(0, 1), 
                     # Prevent the axis from expanding beyond the data range unnecessarily
                     
                     minor_breaks = NULL) +
                      # Remove minor grid lines for a cleaner look
  
  scale_y_continuous(limits = c(4.6, 5.3))
# Set fixed y-axis limits to control visual scale and comparison

FIG2_RIGHT

#FIGURE 2
FIG2 <- ggarrange(FIG2_LEFT, FIG2_RIGHT)
FIG2

ggsave("Output_Figures/FIG2.png", width = 33, height = 22, scale = 0.95, units = "cm", bg = "white")

# Remove all the objects that were created in the process of estimating robust SEs
rm(list = c("cluster_se", "cluster_se_values", "em", "em_summary", "em_summary_relevel", "X", "t_val", "df", "corrected_se", "coef_values"))


#------------------------------------------------------------------------------------------------#
####        3. Figure 3 (Graph on the left) and the underlying model in Table A13             ####
#------------------------------------------------------------------------------------------------#

# Figure 3 uses one item on output expectations --- question: decisions in your interest

# Fit a linear model predicting REP_RAT based on Gender in interaction with attitudes toward women's advances in business
TA13 <- lm(REP_RAT ~ Gender*THR_WOM_split, data = mal_o)

# Compute robust (clustered) standard errors by clustering on ResponseId
cluster_se <- vcovCL(TA13, cluster = ~ResponseId)

# Perform a hypothesis test (t-test) on the coefficients using the clustered standard errors
coeftest(TA13, vcov = cluster_se)

# Extract model coefficients
coef_values <- coef(TA13)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A13
# Generate output for table A13
# IMPORTANT INFORMATION: The item on the item for attitudes toward women's advances in business has been recoded, so that higher values indicate a more negative view toward women's advances in business (has undermined)
# This was previously done so that higher values indicate a higher social identity as men (see H4a and H5b in the Pre-Reg) and more negative attitudes toward gender equality advances
screenreg(list(TA13),
          override.se = cluster_se_values,   # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA13), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A13; Output expectations x Attitudes"
)

# To plot these effects and generate Figure 3 I use the emmeans package
# To use the robust/clustered standard errors with the emmeans package 
# the SEs need to be adjusted in the emmeans output manually. See the following steps: 

# Estimate marginal means (aka least-squares means) for each level of women's rep * attitudes gender equality
em <- emmeans(TA13, ~ Gender*THR_WOM_split)

# Summarize the estimated marginal means
em_summary <- summary(em)

# Display the column names of the emmeans summary for correct names 
names(em_summary)

# Extract the linear function (contrast matrix) used to compute marginal means
X <- em@linfct

# Compute corrected standard errors for the marginal means using the cluster-robust covariance matrix
# cluster_se was generated in line 401
corrected_se <- sqrt(diag(X %*% cluster_se %*% t(X)))

# Add the corrected standard errors to the emmeans summary output
em_summary$SE <- corrected_se

# Extract degrees of freedom associated with each estimate
df <- em_summary$df

# Compute the critical t-value for a 95% confidence interval
t_val <- qt(0.975, df)

# Calculate lower and upper 95% confidence limits using corrected standard errors
em_summary$lower.CL <- em_summary$emmean - t_val * corrected_se
em_summary$upper.CL <- em_summary$emmean + t_val * corrected_se

# Recalculate p-values using the corrected standard errors
em_summary$p.value <- 2 * pt(abs(em_summary$emmean / corrected_se), 
                             df = df, lower.tail = FALSE)

# Display the updated emmeans summary with corrected SEs, CIs, and p-values
em_summary

# After adjusting the standard errors I continue with changing the levels of the treatment to increase the distance between 20 and 50% women
# This way the distance between 20 and 50% women is larger than the distance between 50% and 65% women in Figure 3

em_summary_relevel <- em_summary 
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "20% Women\n80% Men"] <- 0
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "50% Women\n50% Men"] <- 2
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "65% Women\n35% Men"] <- 3

FIG3_LEFT <- ggplot(em_summary_relevel, aes(x = Gender_Nlevel, y = emmean, color = THR_WOM_split, group = THR_WOM_split)) +
  # Create a ggplot object using 'em_summary_relevel' dataset
  # Set the aesthetics: x-axis is 'Gender_Nlevel', y-axis is 'emmean', the group is neg. and pos. attitudes toward gender equality
  
  geom_point(position = position_dodge(0.2), size = 3) +  
  # Add points to represent estimated marginal means

  geom_line(position = position_dodge(0.2), size = 0.6, linetype = 2) +
  # Add dashed lines connecting the points, dodged slightly for clarity
  
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), size = 0.8,
                width = 0.04, position = position_dodge(0.2)) +  
  # Add error bars representing confidence intervals (lower.CL to upper.CL)
  # Slightly wider dodge for error bars, so they don’t overlap with points
  
  labs(y = "How much will the assembly's decisions be in the interest of people like you?", 
       title = "Men's Output Expectations", 
  ) +
  # Add axis labels and plot title
  
  theme_minimal() +
  # Use a clean, minimal theme for the plot
  
  theme(legend.position = "top", 
        # Place legend at the top of the graph
    
        axis.line.x = element_line(linewidth = 0.2, colour = "black"),
        # Add a thin black line to the x-axis
        
        axis.line.y = element_line(linewidth = 0.2, colour = "black"),
        # Add a thin black line to the y-axis
        
        axis.ticks =  element_line(linewidth = 0.3, colour = "black"), 
        # Add black tick marks on both axes
        
        axis.text.x = element_text(colour = "black", size = 11),
        # Make x-axis labels text black and increase font size for readability
        
        axis.title.x = element_blank(), 
        # Remove x-axis title
        
        axis.title.y = element_text(size = 14), 
        # Set font size of y axis title
        
        axis.text.y = element_text(size = 11), 
        # Increase y-axis labels font size for readability
        
        legend.text = element_text(size = 11), 
        # Increase font size of legend labels
        
        legend.title = element_text(size = 11), 
        # Increase font size of legend title
        
        plot.title = element_text(hjust = 0.5, size = 14), 
        # Center the plot title horizontally
        
  ) +
  scale_colour_manual(values = c("#000000", "#909497"), 
                      labels = c("0-3" = "Enriched Life in UK", "4-6" = "Undermined Life in UK")) +
  # Set colors for the two groups and change the labels so it is clear that values from 0-3 represent male respondents who reported that women's advances in business enriches life in the UK while respondents scoring 4 or higher think women's advances undermine life in the UK; 
  
  guides(
    color = guide_legend(title = "Attitudes toward Gender Equality\nMeasures in Business")
  ) +
  # Provide a title for the legend
  
  scale_x_continuous(breaks = c(0, 2, 3), 
                     # Set specific breaks on the x-axis corresponding to coded levels
                     
                     labels = c("20% Women\n80% Men", "50% Women\n50% Men", "65% Women\n35% Men"),
                     # Customize labels for each x break, using line breaks (\n) for formatting
                     
                     expand = c(0, 1), 
                     # Prevent the axis from expanding beyond the data range unnecessarily
                     
                     minor_breaks = NULL)
  # Remove minor grid lines for a cleaner look
  
FIG3_LEFT

# Remove all the objects created in the process of estimating robust SEs
rm(list = c("cluster_se", "cluster_se_values", "em", "em_summary", "em_summary_relevel", "X", "t_val", "df", "corrected_se", "coef_values"))

#------------------------------------------------------------------------------------------------#
####        4. Figure 3 (Graph on the right) and the underlying model in Table A14            ####
#------------------------------------------------------------------------------------------------#

# Figure 3 uses one item on the fairness perceptions --- fairness of decisions

# Fit a linear model predicting COND_RAT based on Gender in interaction with the attitudes toward women's advances in business
TA14 <- lm(COND_RAT ~ Gender*THR_WOM_split, data = mal_f)

# Compute robust (clustered) standard errors by clustering on ResponseId
cluster_se <- vcovCL(TA14, cluster = ~ResponseId)

# Perform a hypothesis test on the coefficients using the clustered standard errors 
coeftest(TA14, vcov = cluster_se)

# Extract model coefficients 
coef_values <- coef(TA14)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A14
# Generate output for table A14
# IMPORTANT INFORMATION: The item on the item for attitudes toward women's advances in business has been recoded, so that higher values indicate a more negative view toward women's advances in business (has undermined)
# This was previously done so that higher values indicate a higher social identity as men (see H4a and H5b in the Pre-Reg) and more negative attitudes toward gender equality advances
screenreg(list(TA14),
          override.se = cluster_se_values,   # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA14), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A14; Fairness perceptions x Attitudes"
)

# To plot these effects and generate Figure 3 I use the emmeans package
# To use the robust/clustered standard errors with the emmeans package 
# the SEs need to be adjusted in the emmeans output manually. See the following steps: 

# Estimate marginal means (aka least-squares means) for each level of women's rep * attitudes gender equality
em <- emmeans(TA14, ~ Gender*THR_WOM_split)

# Summarize the estimated marginal means
em_summary <- summary(em)

# Display the column names of the emmans summary for the correct names
names(em_summary)

# Extract the linear function (constrast matrix) used to compute marginal means
X <- em@linfct

# Compute corrected standard errors for the marginal means using the cluster-robust covariance matrix
# cluster_se was generated in line 554
corrected_se <- sqrt(diag(X %*% cluster_se %*% t(X)))

# Add the corrected standard errors to the emmans summary output
em_summary$SE <- corrected_se

# Extract degrees of freedom associated with each estimate
df <- em_summary$df 

# Compute the critical t-value for the 95% confidence interval 
t_val <- qt(0.975, df)

# Calculate lower and upper 95% confidence limites using corrected standard errors
em_summary$lower.CL <- em_summary$emmean - t_val * corrected_se
em_summary$upper.CL <- em_summary$emmean + t_val * corrected_se

# Recalculate p-values using the corrected standard errors
em_summary$p.value <- 2 * pt(abs(em_summary$emmean / corrected_se), 
                             df = df, lower.tail = FALSE)

# Display the updated emmeans summary with corrected SEs, CIs, and p-values
em_summary

# After adjusting the standard errors I continue with changing the levels of the treatment to increase the distance between 20 and 50% women
# This way the distance between 20 and 50% women is larger than the distance between 50% and 65% women in Figure 3

em_summary_relevel <- em_summary 
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "20% Women\n80% Men"] <- 0
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "50% Women\n50% Men"] <- 2
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "65% Women\n35% Men"] <- 3

FIG3_RIGHT <- ggplot(em_summary_relevel, aes(x = Gender_Nlevel, y = emmean, 
                                             color = THR_WOM_split, group = THR_WOM_split)) +
  # Create a ggplot object using 'em_summary_relevel' dataset
  # Set the aesthetics: x-axis is 'Gender_Nlevel', y-axis is 'emmean', the group is neg. and pos. attitudes toward gender equality
  
  geom_point(position = position_dodge(0.2), size = 3) +  
  # Add points to represent estimated marginal means

  geom_line(position = position_dodge(0.2), size = 0.6, linetype = 2) +
  # Add dashed lines connecting the points, dodged slightly for clarity
  
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), size = 0.8,
                width = 0.04, position = position_dodge(0.2)) +  
  # Add error bars representing confidence intervals (lower.CL to upper.CL)
  # Slightly wider dodge for error bars, so they don’t overlap with points
  
  labs(y = "How fair will the assembly's decisions be?", 
       title = "Men's Fairness Perceptions", 
  ) +
  # Add axis labels and plot title
  
  theme_minimal() +
  # Use a clean, minimal theme for the plot
  
  theme(legend.position = "top", 
        # Place legend at the top of the graph
        
        axis.line.x = element_line(linewidth = 0.2, colour = "black"),
        # Add a thin black line to the x-axis
        
        axis.line.y = element_line(linewidth = 0.2, colour = "black"),
        # Add a thin black line to the y-axis
        
        axis.ticks =  element_line(linewidth = 0.3, colour = "black"), 
        # Add black tick marks on both axes
        
        axis.text.x = element_text(colour = "black", size = 11),
        # Make x-axis labels text black and increase font size for readability
        
        axis.title.x = element_blank(), 
        # Remove x-axis title
        
        axis.title.y = element_text(size = 14), 
        # Set font size of y axis title
        
        axis.text.y = element_text(size = 11), 
        # Increase y-axis labels font size for readability
        
        legend.text = element_text(size = 11), 
        # Increase font size of legend labels
        
        legend.title = element_text(size = 11), 
        # Increase font size of legend title
        
        plot.title = element_text(hjust = 0.5, size = 14), 
        # Center the plot title horizontally
        
  ) +
  scale_colour_manual(values = c("#000000", "#909497"), 
                      labels = c("0-3" = "Enriched Life in UK", "4-6" = "Undermined Life in UK")) +
  # Set colors for the two groups and change the labels so it is clear that values from 0-3 represent male respondents who reported that women's advances in business enriches life in the UK while respondents scoring 4 or higher think women's advances undermine life in the UK; 
  
  guides(
    color = guide_legend(title = "Attitudes toward Gender Equality\nMeasures in Business")
  ) +
  # Provide a title for the legend
  
  scale_x_continuous(breaks = c(0, 2, 3), 
                     # Set specific breaks on the x-axis corresponding to coded levels
                     
                     labels = c("20% Women\n80% Men", "50% Women\n50% Men", "65% Women\n35% Men"),
                     # Customize labels for each x break, using line breaks (\n) for formatting
                     
                     expand = c(0, 1), 
                     # Prevent the axis from expanding beyond the data range unnecessarily
                     
                     minor_breaks = NULL)
# Remove minor grid lines for a cleaner look

FIG3_RIGHT

#FIGURE 3
# Generate Figure 3
FIG3 <- ggarrange(FIG3_LEFT, FIG3_RIGHT, common.legend = T)
FIG3

ggsave("Output_Figures/FIG3.png", width = 33, height = 22, scale = 0.95, units = "cm", bg = "white")

# Remove all the objects created in the process of estimating robust SEs
rm(list = c("cluster_se", "cluster_se_values", "em", "em_summary", "em_summary_relevel", "X", "t_val", "df", "corrected_se", "coef_values"))


#------------------------------------------------------------------------------------------------#
####                       5. Sample Information & Diagnostics                                ####
#------------------------------------------------------------------------------------------------#

# To replicate the sample information and diagnostistics please run Rep_Diagnostics.R

#------------------------------------------------------------------------------------------------#
####              6. Further results on output expectations: Tables A8 and A9                 ####
#------------------------------------------------------------------------------------------------#

# Results of linear regression model using the second pre-registered outcome measure: “How much do you believe this assembly’s decisions will make you better off in the future?”

# Fit a linear model predicting STAT_RAT based on Gender
TA8 <- lm(STAT_RAT ~ Gender, data = mal_o)

# Compute robust (clustered) standard erros by clustering on ResponseId
cluster_se <- vcovCL(TA8, cluster = ~ResponseId)

# Perform a hypothesis test on the coefficients using the clustered standard errors
coeftest(TA8, vcov = cluster_se)

# Extract model coefficients 
coef_values <- coef(TA8)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A8 
# Generate output for table A8
screenreg(list(TA8),
          override.se = cluster_se_values,   # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA8), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A8; Output expectations; Outcome 2"
)

rm(list = c("cluster_se", "cluster_se_values", "coef_values"))


# Results of linear regression model for output expectations of female respondents
# First load the data on female respondents
load("resp_dat_female.RData") # respondent level data of female resp; named "fem"

# merge "fem" and "o" to estimate models on women's output expectations
fem_o <- merge(fem, o, by = "ResponseId", all.x = T) # add "all.x" true. this way only female respondents are included in the dataset
fem_f <- merge(fem, f, by = "ResponseId", all.x = T) # add "all.x" true. this way only female respondents are included in the dataset

# Create another set of attribute level variables with more readable labels in the output expectations dataset
fem_o$Gender <- fem_o$gender
fem_o$Gender <- recode(fem_o$Gender, 
                       gender_womenhigh = "65% Women\n35% Men", 
                       gender_womenequal = "50% Women\n50% Men", 
                       gender_womenlow = "20% Women\n80% Men")
class(fem_o$Gender)

fem_o$Gender <- relevel(fem_o$Gender, ref = "50% Women\n50% Men") # change the reference group to equal levels of gender representation 50%/50%

fem_o$Ideology <- fem_o$ideology
fem_o$Ideology <- recode(fem_o$ideology, 
                         ideology_righthigh = "40% Left-wing\n60% Right-wing", 
                         ideology_equal = "50% Left-wing\n50% Right-wing", 
                         ideology_lefthigh = "60% Left-wing\n40% Right-wing")
class(fem_o$Ideology)

# Create another set of attribute level variables with more readable labels in the fairness perceptions dataset
fem_f$Gender <- fem_f$gender
fem_f$Gender <- recode(fem_f$Gender, 
                       gender_womenhigh = "65% Women\n35% Men", 
                       gender_womenequal = "50% Women\n50% Men", 
                       gender_womenlow = "20% Women\n80% Men")
class(fem_f$Gender)

fem_f$Gender <- relevel(fem_f$Gender, ref = "50% Women\n50% Men") # change the reference group to equal levels of gender representation 50%/50%

fem_f$Ideology <- fem_f$ideology
fem_f$Ideology <- recode(fem_f$ideology, 
                         ideology_righthigh = "40% Left-wing\n60% Right-wing", 
                         ideology_equal = "50% Left-wing\n50% Right-wing", 
                         ideology_lefthigh = "60% Left-wing\n40% Right-wing")
class(fem_f$Ideology)

# Fit a linear model predicting REP_RAT based on Gender representation for female respondents
TA9 <- lm(REP_RAT ~ Gender, data = fem_o)

# Compute robust (clustered) standard erros by clustering on ResponseId
cluster_se <- vcovCL(TA9, cluster = ~ResponseId)

# Perform a hypothesis test on the coefficients using the clustered standard errors
coeftest(TA9, vcov = cluster_se)

# Extract model coefficients 
coef_values <- coef(TA9)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A9 
# Generate output for table A9
screenreg(list(TA9),
          override.se = cluster_se_values,   # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA9), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A9; Output expectations; Female Resp"
)

# Remove the objects created in the process of estimating robust standard errors
rm(list = c("cluster_se", "cluster_se_values", "coef_values"))


#------------------------------------------------------------------------------------------------#
####             7. Further results on fairness perceptions: Tables A11 and A12               ####
#------------------------------------------------------------------------------------------------#

# Results of linear regression model using the second pre-registered outcome measure: “In your opinion, how unfair do you think could the assembly’s decisions be to some groups?”

# Fit a linear model predicting RECOG_RAT based on Gender representation
TA11 <- lm(RECOG_RAT ~ Gender, data = mal_f)

# Compute robust (clustered) standard erros by clustering on ResponseId
cluster_se <- vcovCL(TA11, cluster = ~ResponseId)

# Perform a hypothesis test on the coefficients using the clustered standard errors
coeftest(TA11, vcov = cluster_se)

# Extract model coefficients 
coef_values <- coef(TA11)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A11
# Generate output for table A11
screenreg(list(TA11),
          override.se = cluster_se_values,   # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA11), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A11; Fairness Perceptions; Outcome 2"
)

rm(list = c("cluster_se", "cluster_se_values", "coef_values"))

# Results of linear regression model for fairness perceptions of female respondents
# Fit a linear model predicting CON_RAT based on Gender representation for female respondents
TA12 <- lm(COND_RAT ~ Gender, data = fem_f)

# Compute robust (clustered) standard errors by clustering on ResponseId
cluster_se <- vcovCL(TA12, cluster = ~ResponseId)

# Perform a hypothesis test on the coefficients using the clustered standard errors
coeftest(TA12, vcov = cluster_se)

# Extract model coefficients 
coef_values <- coef(TA12)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A12 
# Generate output for table A12
screenreg(list(TA12),
          override.se = cluster_se_values,   # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA12), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A12; Fairness Perceptions; Female Resp"
)

rm(list = c("cluster_se", "cluster_se_values", "coef_values"))

#-----------------------------------------------------------------------------------------------------------#
####  8. Results: Interaction Gender Representation and Men's Gender Consciousness; Tables A15 and A16   ####
#-----------------------------------------------------------------------------------------------------------#

# Results of linear regression model for output expectations; Gender representation in interaction with group consciousness

# Fit a linear model predicting REP_RAT based on Gender representation in interaction with group consciousness (PGI)
TA15 <- lm(REP_RAT ~ Gender*PGI_MAL_split, data = mal_o)

# Compute robust (clustered) standard errors by clustering on ResponseId
cluster_se <- vcovCL(TA15, cluster = ~ResponseId)


# Perform a hypothesis test on the coefficients using the clustered standard errors
coeftest(TA15, vcov = cluster_se)

# Extract model coefficients 
coef_values <- coef(TA15)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A15 
# Generate output for table A15
screenreg(list(TA15),
          override.se = cluster_se_values,   # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA15), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A15; Output expectations; Gender consciousness"
)

rm(list = c("cluster_se", "cluster_se_values", "coef_values"))


# Results of linear regression model for fairness perceptions; Gender representation in interaction with group consciousness

# Fit a linear model predicting COND_RAT based on Gender representation in interaction with group consciousness (PGI)
TA16 <- lm(COND_RAT ~ Gender*PGI_MAL_split, data = mal_f)

# Compute robust (clustered) standard errors by clustering on ResponseId
cluster_se <- vcovCL(TA16, cluster = ~ResponseId)

# Perform a hypothesis test on the coefficients using the clustered standard errors
coeftest(TA16, vcov = cluster_se)

# Extract model coefficients 
coef_values <- coef(TA16)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A16 
# Generate output for table A16
screenreg(list(TA16),
          override.se = cluster_se_values,   # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA16), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A16; Fairness perceptions; Gender consciousness"
)

# Remove all objects created in the process of estimating robust SEs
rm(list = c("cluster_se", "cluster_se_values", "coef_values"))


#----------------------------------------------------------------------------------------------------------------------------#
####  9. Additional Results: Interaction Gender Representation & Ideological Self-Placement; Fig A3, Tables A17 and A18   ####
#----------------------------------------------------------------------------------------------------------------------------#

# Fit a linear model predicting REP_RAT based on Gender representation in interaction with ideological self-placement
TA17 <- lm(REP_RAT ~Gender*LR_split, data = mal_o)

# Compute robust (clustered) standard errors by clustering on ResponseId
cluster_se <- vcovCL(TA17, cluster = ~ResponseId)

# Perform a hypothesis test on the coefficients using the clustered standard errors
coeftest(TA17, vcov = cluster_se)

# Extract model coefficients
coef_values <- coef(TA17)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A17
# Generate output for table A17
screenreg(list(TA17), 
          override.se = cluster_se_values, # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA17), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A17; Output expectations; Gender Representation*Ideological Self-Placement")


# To plot these effects and generate Figure A3 I use the emmeans package
# To use the robust/clustered standard errors with the emmeans package 
# the SEs need to be adjusted in the emmeans output manually. See the following steps: 

# Estimate marginal means (aka least-squares means) for each level of women's rep * group of ideological self-placement
em <- emmeans(TA17, ~ Gender*LR_split)

# Summarize the estimated marginal means
em_summary <- summary(em)

# Display the column names of the emmeans summary for the correct names
names(em_summary)

# Extract the linear function (constrast matrix) used to compute marginal means
X <- em@linfct

# Compute corrected standard errors for the marginal means using the cluster-robust covariance matrix
# cluster_se was generated in line 966
corrected_se <- sqrt(diag(X %*% cluster_se %*% t(X)))

# Add the corrected standard errors to the emmeans summary output
em_summary$SE <- corrected_se

# Extract degrees of freedom associated with each estimate
df <- em_summary$df 

# Compute the critical t-value for the 95% confidence interval 
t_val <- qt(0.975, df)

# Calculate lower and upper 95% confidence limites using corrected standard errors
em_summary$lower.CL <- em_summary$emmean - t_val * corrected_se
em_summary$upper.CL <- em_summary$emmean + t_val * corrected_se

# Recalculate p-values using the corrected standard errors
em_summary$p.value <- 2 * pt(abs(em_summary$emmean / corrected_se), 
                             df = df, lower.tail = FALSE)

# Display the updated emmeans summary with corrected SEs, CIs, and p-values
em_summary

# After adjusting the standard errors I continue with changing the levels of the treatment to increase the distance between 20 and 50% women
# This way the distance between 20 and 50% women is larger than the distance between 50% and 65% women in Figure 3

em_summary_relevel <- em_summary 
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "20% Women\n80% Men"] <- 0
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "50% Women\n50% Men"] <- 2
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "65% Women\n35% Men"] <- 3

FIGA3_LEFT <- ggplot(em_summary_relevel, aes(x = Gender_Nlevel, y = emmean, color = LR_split, group = LR_split)) +
  # Create a ggplot object using 'em_summary_relevel' dataset
  # Set the aesthetics: x-axis is 'Gender_Nlevel', y-axis is 'emmean', the ideological self-placement
  
  geom_point(position = position_dodge(0.2), size = 2) +  
  # Add points to represent estimated marginal means

  geom_line(position = position_dodge(0.2), size = 0.6, linetype = 2) +
  # Add dashed lines connecting the points, dodged slightly for clarity
  
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), size = 0.8,
                width = 0.04, position = position_dodge(0.2)) +  
  # Add error bars representing confidence intervals (lower.CL to upper.CL)
  # Slightly wider dodge for error bars, so they don’t overlap with points
  
  labs(y = "How much will the assembly's decisions be in the interest of people like you?", 
       title = "Men's Output Expectations", 
  ) +
  # Add axis labels and plot title
  
  theme_minimal() +
  # Use a clean, minimal theme for the plot
  
  theme(legend.position = "top", 
        # Place legend at the top of the graph
        
        axis.line.x = element_line(linewidth = 0.2, colour = "black"),
        # Add a thin black line to the x-axis
        
        axis.line.y = element_line(linewidth = 0.2, colour = "black"),
        # Add a thin black line to the y-axis
        
        axis.ticks =  element_line(linewidth = 0.3, colour = "black"), 
        # Add black tick marks on both axes
        
        axis.text.x = element_text(colour = "black"),
        # Make x-axis labels text black and increase font size for readability
        
        axis.title.x = element_blank(), 
        # Remove x-axis title
        
        plot.title = element_text(hjust = 0.5), 
        # Center the plot title horizontally
        
  ) +
  scale_colour_manual(values = c("#000000", "#909497", "red")) +
  # Set colors for the the three groups (Left-wing; 5; Right-wing)
  
  guides(
    color = guide_legend(title = "Left, Moderate, Right")
  ) +
  # Provide a title for the legend
  
  scale_x_continuous(breaks = c(0, 2, 3), 
                     # Set specific breaks on the x-axis corresponding to coded levels
                     
                     labels = c("20% Women\n80% Men", "50% Women\n50% Men", "65% Women\n35% Men"),
                     # Customize labels for each x break, using line breaks (\n) for formatting
                     
                     expand = c(0, 1), 
                     # Prevent the axis from expanding beyond the data range unnecessarily
                     
                     minor_breaks = NULL)
# Remove minor grid lines for a cleaner look

FIGA3_LEFT

# Remove objects created in the process of estimating robust SEs
rm(list = c("cluster_se", "cluster_se_values", "em", "em_summary", "em_summary_relevel", "X", "t_val", "df", "corrected_se", "coef_values"))


# Fit a linear model predicting COND_RAT based on Gender representation in interaction with ideological majorities
TA18 <- lm(COND_RAT ~Gender*LR_split, data = mal_f)

# Compute robust (clustered) standard errors by clustering on ResponseId
cluster_se <- vcovCL(TA18, cluster = ~ResponseId)

# Perform a hypothesis test on the coefficients using the clustered standard errors
coeftest(TA18, vcov = cluster_se)

# Extract model coefficients
coef_values <- coef(TA18)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A17
# Generate output for table A17
screenreg(list(TA18), 
          override.se = cluster_se_values, # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA18), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A18; Fairness Perceptions; Gender Representation*Ideological Self-Placement")


# To plot these effects and generate Figure A3 I use the emmeans package
# To use the robust/clustered standard errors with the emmeans package 
# the SEs need to be adjusted in the emmeans output manually. See the following steps: 

# Estimate marginal means (aka least-squares means) for each level of women's rep * group of ideological self-placement
em <- emmeans(TA18, ~ Gender*LR_split)

# Summarize the estimated marginal means
em_summary <- summary(em)

# Display the column names of the emmeans summary for the correct names
names(em_summary)

# Extract the linear function (constrast matrix) used to compute marginal means
X <- em@linfct

# Compute corrected standard errors for the marginal means using the cluster-robust covariance matrix
# cluster_se was generated in line 1108
corrected_se <- sqrt(diag(X %*% cluster_se %*% t(X)))

# Add the corrected standard errors to the emmeans summary output
em_summary$SE <- corrected_se

# Extract degrees of freedom associated with each estimate
df <- em_summary$df 

# Compute the critical t-value for the 95% confidence interval 
t_val <- qt(0.975, df)

# Calculate lower and upper 95% confidence limits using corrected standard errors
em_summary$lower.CL <- em_summary$emmean - t_val * corrected_se
em_summary$upper.CL <- em_summary$emmean + t_val * corrected_se

# Recalculate p-values using the corrected standard errors
em_summary$p.value <- 2 * pt(abs(em_summary$emmean / corrected_se), 
                             df = df, lower.tail = FALSE)

# Display the updated emmeans summary with corrected SEs, CIs, and p-values
em_summary

# After adjusting the standard errors I continue with changing the levels of the treatment to increase the distance between 20 and 50% women
# This way the distance between 20 and 50% women is larger than the distance between 50% and 65% women in Figure 3

em_summary_relevel <- em_summary 
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "20% Women\n80% Men"] <- 0
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "50% Women\n50% Men"] <- 2
em_summary_relevel$Gender_Nlevel[em_summary_relevel$Gender == "65% Women\n35% Men"] <- 3

FIGA3_RIGHT <- ggplot(em_summary_relevel, aes(x = Gender_Nlevel, y = emmean, color = LR_split, group = LR_split)) +
  # Create a ggplot object using 'em_summary_relevel' dataset
  # Set the aesthetics: x-axis is 'Gender_Nlevel', y-axis is 'emmean', the ideological self-placement
  
  geom_point(position = position_dodge(0.2), size = 2) +  
  # Add points to represent estimated marginal means
  
  geom_line(position = position_dodge(0.2), size = 0.6, linetype = 2) +
  # Add dashed lines connecting the points, dodged slightly for clarity
  
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), size = 0.8,
                width = 0.04, position = position_dodge(0.2)) +  
  # Add error bars representing confidence intervals (lower.CL to upper.CL)
  # Slightly wider dodge for error bars, so they don’t overlap with points
  
  labs(y = "How fair do you think will the assembly’s decisions be?", 
       title = "Men's Fairness Perceptions", 
  ) +
  # Add axis labels and plot title
  
  theme_minimal() +
  # Use a clean, minimal theme for the plot
  
  theme(legend.position = "top", 
        # Place legend at the top of the graph
        
        axis.line.x = element_line(linewidth = 0.2, colour = "black"),
        # Add a thin black line to the x-axis
        
        axis.line.y = element_line(linewidth = 0.2, colour = "black"),
        # Add a thin black line to the y-axis
        
        axis.ticks =  element_line(linewidth = 0.3, colour = "black"), 
        # Add black tick marks on both axes
        
        axis.text.x = element_text(colour = "black"),
        # Make x-axis labels text black and increase font size for readability
        
        axis.title.x = element_blank(), 
        # Remove x-axis title
        
        plot.title = element_text(hjust = 0.5), 
        # Center the plot title horizontally
        
  ) +
  scale_colour_manual(values = c("#000000", "#909497", "red")) +
  # Set colors for the the three groups (Left-wing; 5; Right-wing)
  
  guides(
    color = guide_legend(title = "Left, Moderate, Right")
  ) +
  # Provide a title for the legend
  
  scale_x_continuous(breaks = c(0, 2, 3), 
                     # Set specific breaks on the x-axis corresponding to coded levels
                     
                     labels = c("20% Women\n80% Men", "50% Women\n50% Men", "65% Women\n35% Men"),
                     # Customize labels for each x break, using line breaks (\n) for formatting
                     
                     expand = c(0, 1), 
                     # Prevent the axis from expanding beyond the data range unnecessarily
                     
                     minor_breaks = NULL)
# Remove minor grid lines for a cleaner look

FIGA3_RIGHT

#FIGURE A3
# Generate Figure A3
FIGA3 <- ggarrange(FIGA3_LEFT, FIGA3_RIGHT, common.legend = T)
FIGA3

ggsave("Output_Figures/FIGA3.png", width = 33, height = 22, scale = 0.95, units = "cm", bg = "white")

# remove all the objects that have been created in the process of adjusting robust SEs
rm(list = c("cluster_se", "cluster_se_values", "em", "em_summary", "em_summary_relevel", "X", "t_val", "df", "corrected_se", "coef_values"))


#----------------------------------------------------------------------------------------------------------------------------#
####  10. Additional Results: Interaction Gender Representation & Ideological Majorities; Table A19   ####
#----------------------------------------------------------------------------------------------------------------------------#

# Fit a linear model predicting REP_RAT based on Gender representation in interaction with ideological majorities
TA19 <- lm(REP_RAT ~Gender*Ideology, data = mal_o)

# Compute robust (clustered) standard errors by clustering on ResponseId
cluster_se <- vcovCL(TA19, cluster = ~ResponseId)

# Perform a hypothesis test on the coefficients using the clustered standard errors
coeftest(TA19, vcov = cluster_se)

# Extract model coefficients
coef_values <- coef(TA19)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A19
# Generate output for table A19
screenreg(list(TA19), 
          override.se = cluster_se_values, # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA19), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A19; Output expectations; Gender*Ideological Representation")

# Remove all objects created in the process of estimating robust SEs
rm(list = c("cluster_se", "cluster_se_values", "coef_values"))

#----------------------------------------------------------------------------------------------------------------------------#
####  11. Additional Results: Interaction Gender Representation & Trust in Parliament; Table A20 & A21   ####
#----------------------------------------------------------------------------------------------------------------------------#

# Fit a linear model predicting REP_RAT based on Gender representation in interaction with Trust in Parliament
TA20 <- lm(REP_RAT ~Gender*T_PARL_split, data = mal_o)

# Compute robust (clustered) standard errors by clustering on ResponseId
cluster_se <- vcovCL(TA20, cluster = ~ResponseId)

# Perform a hypothesis test on the coefficients using the clustered standard errors
coeftest(TA20, vcov = cluster_se)

# Extract model coefficients
coef_values <- coef(TA20)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A20
# Generate output for table A20
screenreg(list(TA20), 
          override.se = cluster_se_values, # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA20), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A20; Output expectations; Gender Representation*Trust in Parliament")



# Fit a linear model predicting COND_RAT based on Gender representation in interaction with Trust in Parliament
TA21 <- lm(COND_RAT ~Gender*T_PARL_split, data = mal_f)

# Compute robust (clustered) standard errors by clustering on ResponseId
cluster_se <- vcovCL(TA21, cluster = ~ResponseId)

# Perform a hypothesis test on the coefficients using the clustered standard errors
coeftest(TA21, vcov = cluster_se)

# Extract model coefficients
coef_values <- coef(TA21)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A21
# Generate output for table A21
screenreg(list(TA21), 
          override.se = cluster_se_values, # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA21), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A21; Fairness Perceptions; Gender Representation*Trust in Parliament")

# Remove all objects created in the process of estimating robust SEs
rm(list = c("cluster_se", "cluster_se_values", "coef_values"))

#----------------------------------------------------------------------------------------------------------------------------#
####  12. Robustness: Models excluding excluding men born between 1946 and 1964; Tables A22 and A23   ####
#----------------------------------------------------------------------------------------------------------------------------#

# Subset the sample to exclude men born between 1946 and 1964
mal_o_robust <- mal_o %>% 
  filter(AGE < 1946 | AGE > 1964)

# Fit a linear model predicting REP_RAT based on Gender representation for younger and very old men
TA22 <- lm(REP_RAT ~Gender, data = mal_o_robust)

# Compute robust (clustered) standard errors by clustering on ResponseId
cluster_se <- vcovCL(TA22, cluster = ~ResponseId)

# Perform a hypothesis test on the coefficients using the clustered standard errors
coeftest(TA22, vcov = cluster_se)

# Extract model coefficients
coef_values <- coef(TA22)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A22
# Generate output for table A22
screenreg(list(TA22), 
          override.se = cluster_se_values, # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA22), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A22; Output expectations; Younger and very old men")

# Remove all objects created in the process of estimating robust SEs
rm(list = c("cluster_se", "cluster_se_values", "coef_values"))

# And for fairness perceptions
# Subset the sample to exclude men born between 1946 and 1964
mal_f_robust <- mal_f %>% 
  filter(AGE < 1946 | AGE > 1964)

# Fit a linear model predicting COND_RAT based on Gender representation for younger and very old men
TA23 <- lm(COND_RAT ~Gender, data = mal_f_robust)

# Compute robust (clustered) standard errors by clustering on ResponseId
cluster_se <- vcovCL(TA23, cluster = ~ResponseId)

# Perform a hypothesis test on the coefficients using the clustered standard errors
coeftest(TA23, vcov = cluster_se)

# Extract model coefficients
coef_values <- coef(TA23)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A23
# Generate output for table A23
screenreg(list(TA23), 
          override.se = cluster_se_values, # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA23), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A23; Fairness Perceptions; Younger and very old men")

# Remove all objects created in the process of estimating robust SEs
rm(list = c("cluster_se", "cluster_se_values", "coef_values"))

#----------------------------------------------------------------------------------------------------------------------------#
####  13. Benchmark: Interaction Ideological self-placement and ideological majorities   ####
#----------------------------------------------------------------------------------------------------------------------------#

# First load the data on all respondents (all genders)
load("resp_dat_all.RData") # respondent level data 

# merge "all" and "o" to estimate models on all respondents' output expectations
all_o <- merge(all, o, by = "ResponseId", all.x = T) 

# Create another set of attribute level variables with more readable labels in the output expectations dataset

all_o$Ideology <- all_o$ideology
all_o$Ideology <- recode(all_o$ideology, 
                         ideology_righthigh = "40% Left-wing\n60% Right-wing", 
                         ideology_equal = "50% Left-wing\n50% Right-wing", 
                         ideology_lefthigh = "60% Left-wing\n40% Right-wing")
class(all_o$Ideology)

# Fit a linear model predicting REP_RAT based on ideological majorities and left-right self-placement
TA24 <- lm(REP_RAT ~ Ideology*LR_split, data = all_o)

# Compute robust (clustered) standard errors by clustering on ResponseId
cluster_se <- vcovCL(TA24, cluster = ~ResponseId)

# Perform a hypothesis test on the coefficients using the clustered standard errors
coeftest(TA24, vcov = cluster_se)

# Extract model coefficients
coef_values <- coef(TA24)

# Extract the cluster-robust standard errors for each coefficient
cluster_se_values <- sqrt(diag(cluster_se))

#TABLE A24
# Generate output for table A24
screenreg(list(TA24), 
          override.se = cluster_se_values, # Provide cluster-robust SEs
          override.pvalues = 2 * pt(abs(coef_values / cluster_se_values), 
                                    df = df.residual(TA24), 
                                    lower.tail = FALSE), # Adjust p-values
          custom.model.names = "Table A24; Output expectations; Ideological Majorities * Left-Right Self-Placement")

# To plot these effects and generate Figure A4 I use the emmeans package
# To use the robust/clustered standard errors with the emmeans package 
# the SEs need to be adjusted in the emmeans output manually. See the following steps: 


# Estimate marginal means for each level of ideological majorities * group of ideological self-placement
em <- emmeans(TA24, ~ Ideology*LR_split)

# Summarize the estimated marginal means
em_summary <- summary(em)

# Display the column names of the emmeans summary for the correct names
names(em_summary)

# Extract the linear function (constrast matrix) used to compute marginal means
X <- em@linfct

# Compute corrected standard errors for the marginal means using the cluster-robust covariance matrix
# cluster_se was generated in line 1108
corrected_se <- sqrt(diag(X %*% cluster_se %*% t(X)))

# Add the corrected standard errors to the emmeans summary output
em_summary$SE <- corrected_se

# Extract degrees of freedom associated with each estimate
df <- em_summary$df 

# Compute the critical t-value for the 95% confidence interval 
t_val <- qt(0.975, df)

# Calculate lower and upper 95% confidence limits using corrected standard errors
em_summary$lower.CL <- em_summary$emmean - t_val * corrected_se
em_summary$upper.CL <- em_summary$emmean + t_val * corrected_se

# Recalculate p-values using the corrected standard errors
em_summary$p.value <- 2 * pt(abs(em_summary$emmean / corrected_se), 
                             df = df, lower.tail = FALSE)

# Display the updated emmeans summary with corrected SEs, CIs, and p-values
em_summary


FIGA4 <- ggplot(em_summary, aes(x = Ideology, y = emmean, 
                                        color = LR_split, group = LR_split)) +
  # Create a ggplot object using 'em_summary' dataset
  # Set the aesthetics: x-axis is 'Gender_Nlevel', y-axis is 'emmean', the ideological self-placement
  
  geom_point(position = position_dodge(0.2), size = 2) +  
  # Add points to represent estimated marginal means
  
  geom_line(position = position_dodge(0.2), size = 0.6, linetype = 2) +
  # Add dashed lines connecting the points, dodged slightly for clarity
  
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), size = 0.8,
                width = 0.04, position = position_dodge(0.2)) +  
  # Add error bars representing confidence intervals (lower.CL to upper.CL)
  # Slightly wider dodge for error bars, so they don’t overlap with points
  
  labs(y = "How much will the assembly's decisions be in the interest of people like you?", 
       title = "All respondents' output expectations", 
  ) +
  # Add axis labels and plot title
  
  theme_minimal() +
  # Use a clean, minimal theme for the plot
  
  theme(legend.position = "top", 
        # Place legend at the top of the graph
        
        axis.line.x = element_line(linewidth = 0.2, colour = "black"),
        # Add a thin black line to the x-axis
        
        axis.line.y = element_line(linewidth = 0.2, colour = "black"),
        # Add a thin black line to the y-axis
        
        axis.ticks =  element_line(linewidth = 0.3, colour = "black"), 
        # Add black tick marks on both axes
        
        axis.text.x = element_text(colour = "black"),
        # Make x-axis labels text black and increase font size for readability
        
        axis.title.x = element_blank(), 
        # Remove x-axis title
        
        plot.title = element_text(hjust = 0.5), 
        # Center the plot title horizontally
        
  ) +
  scale_colour_manual(values = c("#000000", "#909497", "red")) +
  # Set colors for the the three groups (Left-wing; 5; Right-wing)
  
  guides(
    color = guide_legend(title = "Left, Moderate, Right")
  ) 

#FIGURE A4
# Generate Figure A4
FIGA4

ggsave("Output_Figures/FIGA4.png", width = 17, height = 22, scale = 0.95, units = "cm", bg = "white")

# Remove all objects created in the process of estimating robust SEs
rm(list = c("cluster_se", "cluster_se_values", "em", "em_summary", "em_summary_relevel", "X", "t_val", "df", "corrected_se", "coef_values"))


#----------------------------------------------------------------------------------------------------------------------------#
####  14. List of open-question responses; Tables A25 & A26   ####
#----------------------------------------------------------------------------------------------------------------------------#

# draw a sample of 200 responses to the open response question: “If you think about the women in the parliament you just saw, what kind of people do you think of?”
open_qu <- sample(mal$IMP_GENDERREP_WOM, 200)

# The list contains a lot of NAs because only a random subset of respondents was asked this question; Hence, I remove all NAs from the list
open_qu_clean <- open_qu[!is.na(open_qu)]

# Generate the xtable output; 
# Note that the output will be different from the Table A25 and A26 in the Supplmentary Material because drawing from the responses was random
#TABLE A25 and A26
xtable(table(open_qu_clean))

#----------------------------------------#
####             END OF FILE          ####
#----------------------------------------#



